/*
 * Copyright 2008 Carnegie Mellon University
 * Licensed under the Apache License, Version 2.0 (the "License"); 
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *  
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, 
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package edu.cmu.lti.sepia.task.nlp.japanese;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class NumberUtil {

    //private static final String prefix = "概ね|大体|だいたい|およそ|約|時速";
    //private static final String postfix = "以上|以下|ごろ|頃|ぐらい|程度|過ぎ|余り";
    //private static final String num1 = "〇一二三四五六七八九十百千万億０１２３４５６７８９";
    private static final String num2 = "零壱弐参拾〇一二三四五六七八九十百千万億兆０１２３４５６７８９．・，";
    //private static final Pattern pNumber = Pattern.compile("["+num2+"]");
    private static final Pattern pNumberReformat = Pattern.compile(
    		"([^"+num2+"]*["+num2+"])[ ](["+num2+"][^"+num2+"]*)");
	
	public static String reformatNumber(String word) {
		word = word.replaceAll("[?]", "");
		boolean endFlag = false;
        while (!endFlag) {
            Matcher mNumberReformat = pNumberReformat.matcher(word);
            if (mNumberReformat.find()) {
                word = mNumberReformat.replaceFirst(mNumberReformat.group(1)
                    + mNumberReformat.group(2));
            } else {
                endFlag = true;
            }
        }
        return word;
    }
	
}
